#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time    : 2020/2/22 11:04
# @Author  : HaoXuan
# @Email   : 879316712@qq.com
# @File    : 采集IP代理.py
# @Software: PyCharm
'''
使用requests采集：https://www.xicidaili.com/nn/1
采集IP地址和端口
'''
import requests

def getOnePageHtml():
    '''
    获取一页的数据
    ：return：html网页
    '''
    url = 'https://www.xicidaili.com/nn/1'
    # 以下操作相当于在浏览器里打开网址
    # 返回的html页面叫做response
    # 报503的原因：因为这不是浏览器，而是代码去获取，必须模拟浏览器去完成
    #最简单的反爬s
    headers = {
        'User - Agent': 'Mozilla / 5.0(Windows NT 10.0;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 75.0.3770.100Safari / 537.36',
        "Cookie": "_free_proxy_session = BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJTI2ODA2ODdmN2Q5MzgxNGU0MDMxYzVlMTViNmUyN2VmBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMWc1eHhQaXpqZ29iamYwdEM2RDFGblpaMDhRRWx3STNhTW5ISEtLeTVkZXM9BjsARg % 3D % 3D - -8a57d2150f13e60dfef048288d67c864c2108ffc;Hm_lvt_0cf76c77469e965d2957f0553e6ecf59 = 1582340738;Hm_lpvt_0cf76c77469e965d2957f0553e6ecf59 = 1582349221"
    }
    response = requests.get(url, headers=headers)
    print(response.text)  # 打印html文本内容


getOnePageHtml()  # 调用函数
